% Data analysis for mySQL final_v4_tbl
% 1998-2009, rating 1-5.5 and default -1
% Take year obeservation with month=12

clc
clear

conn = database('BDC','','');
%table = 'final_v4_tbl';
year_list=1997:2009;
DURATION = 1;
NUMBER_OF_YEAR = length(year_list) - DURATION;
Y = cellstr(char('1998','1999','2000','2001','2002','2003','2004','2005','2006','2007','2008','2009'));
R = cellstr(char('1','1.5','2','2.5','3','3.5','4','4.5','5','5.5'));
SG = cellstr(char('Total sample: [$50k,+)','Subsample A: [$50k, 250k)','Subsample B: [$250k, 1000k)','Subsample C: [$1000k, +)'));
linestyles = cellstr(char('-',':','-.','--','-',':','-.','--','-',':','-',':',...
'-.','--','-',':','-.','--','-',':','-.'));
Markers=['o','+','s','d','*','x','v','^','<','>','p','h','.',...
'+','*','o','x','^','<','h','.','>','p','s','d','v',...
'o','x','+','*','s','d','v','^','<','>','p','h','.'];
S = cellstr(char('Business Services','Construction','Manufacture','Non-Business Services', 'Other',	'Resources','Retail','Supplier Premises','Tourism','Transport+Storage','Wholesale'));
G = cellstr(char('Alberta','British Columbia','Manitoba','New Brunswick','Newfoundland & Labrador','Northwest Territories','Nova Scotia','Ontario','Prince Edward Island','Quebec','Saskatchewan','Yukon'));


%% evolution of rating universe over time-----------------------------------
sqlquery1 = strcat(['Select modiFisYear, count(customer) as records FROM final_v4_tbl_yr_2 Group by modiFisYear']);
results1 = fetch(conn, sqlquery1);
numObligors1 = cell2mat(results1);
figure
bar(numObligors1(1:12,2));
colormap(summer)
set(gca,'XLim',[0,13]);
set(gca,'XTickLabel',Y);
xlabel('Years');
ylabel('Number of Obligor Years');




%% Average Rating Distribution----------------------------------------------
sqlquery2 = strcat(['SELECT modiRate, count(customer) as records FROM final_v4_tbl_yr_2 group by modiRate']);
results2 = fetch(conn, sqlquery2);
numObligors2 = cell2mat(results2);
figure
bar(numObligors2(2:length(numObligors2),2));
colormap(summer)
set(gca,'XTickLabel',R);
xlabel('Rating Category');
ylabel('Number of Obligor Years');

%% Figure 3 Industry Distribution

Sectors = xlsread('D:\My Documents\CIRANO\Writing\Chapter1_2012\data.xlsx','Sector','i5:l15');

figure
bar ( Sectors(:,2:length(SG)) ); colormap(summer);
hold on
plot (Sectors(:,1), [linestyles{3} Markers(1)],'LineWidth',0.01,'MarkerSize',8,'markerfacecolor','r');
%set(gca,'XTickLabel',S);
xticklabel_rotate(1:length(S),45,S);
set(gca, 'YTickMode','manual') % set unit to percentage
set(gca, 'YTickLabel',num2str(100.*get(gca,'YTick')','%g%%')) % set unit to percentage
xlabel('Industry Sectors');
ylabel('Percentage of Obligors');
legend('Subsample A: [$50k, 250k)','Subsample B: [$250k, 1000k)','Subsample C: [$1000k, +)','Total sample: [$50k,+)');
grid on

%% Figure 4 Geography Distribution

Geo = xlsread('D:\My Documents\CIRANO\Writing\Chapter1_2012\data.xlsx','Geo','h4:k15');

figure
bar ( Geo(:,2:length(SG)) ); colormap(summer);
hold on
plot (Geo(:,1), [linestyles{3} Markers(1)],'LineWidth',0.01,'MarkerSize',8,'markerfacecolor','r');
set(gca,'XTickLabel',G);
set(gca,'XLim',[0 length(G)+1]);
xticklabel_rotate(1:length(G),30,G);
set(gca, 'YTickMode','manual') % set unit to percentage
set(gca, 'YTickLabel',num2str(100.*get(gca,'YTick')','%g%%')) % set unit to percentage
xlabel('Provinces');
ylabel('Percentage of Obligors');
legend('Sample A: [$50k, 250k)','Sample B: [$250k, 1000k)','Sample C: [$1000k, +)','Total sample: [$50k, +)');
grid on






